secret_password = "secret"
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
def summarize(dataframe):
cols = dataframe.columns
print("*** Dataframe info ***", end = "\n\n")
dataframe.info()
print()
print("*** Column Unique Values ***", end = "\n\n")
for i in range(len(cols)):
print(f"{cols[i]} values: {dataframe[cols[i]].unique()}", end = "\n\n")
df = pd.read_csv("data/PrincipalCausesofDeath.csv")
summarize(df)
*** Dataframe info *** <class 'pandas.core.frame.DataFrame'> RangeIndex: 172 entries, 0 to 171 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 year 172 non-null int64 1 rank 172 non-null int64 2 icd 172 non-null object 3 classification 172 non-null object 4 disease_condition 172 non-null object 5 percentage_deaths 172 non-null float64 dtypes: float64(1), int64(2), object(3) memory usage: 8.2+ KB *** Column Unique Values *** year values: [2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022] rank values: [ 1 2 3 4 5 6 7 8 9 10] icd values: ['ICD9' 'ICD10'] classification values: ['140-208' '410-414' '480-486' '430-438' 'E800-E999' '393-398, 402, 415-429' '250' '490-493, 496' '599' '580-589' 'C00-C97' 'J12-J18' 'I20-I25' 'I60-I69' 'V01-Y89' 'I10-I15' 'N00-N07, N17-N19, N25-N27' 'N39.0' 'J40-J44' 'I00-I09, I26-I51' 'E10-E14' 'JE10-E14'] disease_condition values: ['Cancer' 'Ischaemic Heart Disease' 'Pneumonia' 'Cerebrovascular Disease (including stroke)' 'Accidents, Poisoning & Violence' 'Other Heart Diseases' 'Diabetes Mellitus' 'Chronic Obstructive Lung Disease' 'Urinary Tract Infection' 'Nephritis, Nephrotic Syndrome & Nephrosis' 'Ischaemic heart diseases' 'Cerebrovascular diseases (including stroke)' 'External causes of morbidity and mortality' 'Hypertensive diseases (including hypertensive heart disease)' 'Nephritis, nephrotic syndrome & nephrosis' 'Urinary tract infection' 'Chronic obstructive lung disease' 'Other heart diseases'] percentage_deaths values: [28.5 18.5 13.7 8.9 6.3 4.3 3.3 2. 1.7 27.7 19.8 13.9 8.7 6. 3.6 2.6 2.2 29.3 20.1 8.3 5.8 4. 2.7 2.5 2.1 19.2 15.3 8. 5.7 4.4 2.4 2.3 18.7 15.7 8.4 5.5 4.8 1. 30. 16.4 16. 9. 5. 30.1 16.8 16.1 9.3 5.6 2.8 1.9 30.5 15.5 4.9 3.1 1.6 29.4 19. 4.7 1.8 29.7 19.4 16.7 6.8 4.5 3.9 29.6 19.3 17. 6.6 29.1 3.4 1.5 28.8 20.6 18.1 3. 1.3 28.4 20.7 18.8 1.4 28.6 20.5 3.7 2.9 1.2 26.4 18.4 6.1 1.1 23.9 20. 19.7]
fig, ax = plt.subplots(figsize=(20,12))
df = df[df["year"] >= 2016]
ax.set_ylim(0,45)
ax.grid()
sns.set(style='white')
sns.barplot(x="year",y="percentage_deaths",hue="disease_condition",data=df)
plt.xlabel("Year")
plt.ylabel("Percentage Deaths by Disease (%)")
plt.legend(title="Diseases", loc="upper right")
plt.show()
df_cancer = pd.read_csv("data/AgeStandardisedMortalityRateforCancer.csv")
df_ihd = pd.read_csv("data/AgeStandardisedMortalityRateforIschaemicHeartDisease.csv")
df_stroke = pd.read_csv("data/AgeStandardisedMortalityRateforStroke.csv")
summarize(df_cancer)
summarize(df_ihd)
summarize(df_stroke)
*** Dataframe info *** <class 'pandas.core.frame.DataFrame'> RangeIndex: 30 entries, 0 to 29 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 year 30 non-null int64 1 cancer 30 non-null float64 dtypes: float64(1), int64(1) memory usage: 612.0 bytes *** Column Unique Values *** year values: [1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019] cancer values: [244.5 241. 240.8 231.8 240.7 230.8 227.2 223. 218.5 207.3 199.6 204.2 198.6 184.9 180.1 169.3 170. 165.2 157.6 153.2 157.5 154.9 155.1 143.9 141.9 135.9 134.2 130.9 122.2] *** Dataframe info *** <class 'pandas.core.frame.DataFrame'> RangeIndex: 30 entries, 0 to 29 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 year 30 non-null int64 1 ihd 30 non-null float64 dtypes: float64(1), int64(1) memory usage: 612.0 bytes *** Column Unique Values *** year values: [1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019] ihd values: [178.9 165.5 165.8 157. 158.5 147.2 147.8 145.7 142.1 136.1 122.3 125.6 112.5 104.9 97. 94. 86.6 93.6 93.4 84.5 77.2 69.6 67. 61.6 61.4 62. 61. 65.6 63.9 62.6] *** Dataframe info *** <class 'pandas.core.frame.DataFrame'> RangeIndex: 30 entries, 0 to 29 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 year 30 non-null int64 1 stroke 30 non-null float64 dtypes: float64(1), int64(1) memory usage: 612.0 bytes *** Column Unique Values *** year values: [1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019] stroke values: [95.8 95.5 83.7 83.1 81.2 78.2 74.7 66.7 61.4 55.3 53.4 44.4 42.2 41.7 40.8 37.3 32.9 30.8 30.2 26.3 28.1 28.2 28.9 25.4 23.5 19.9 19.6 18.4 18.2 16.4]
df = pd.concat([df_cancer,df_ihd.drop(columns="year"),df_stroke.drop(columns="year")], axis=1)
df.head()
sns.lineplot(data=df,x="year",y="cancer",label="cancer")
sns.lineplot(data=df,x="year",y="ihd",label="ihd")
sns.lineplot(data=df,x="year",y="stroke",label="stroke")
plt.grid(linestyle="--")
plt.ylim(0,300)
plt.xlabel("Year")
plt.ylabel("Age Standardised Mortality Rate")
Text(0, 0.5, 'Age Standardised Mortality Rate')
df = pd.read_csv("data/NumberofGraduatesinHealthcareSpecialisationsbyCourse.csv")
df = df[df["no_of_graduates"] != "na"]
df = df.astype({'no_of_graduates': 'int64'})
summarize(df)
*** Dataframe info *** <class 'pandas.core.frame.DataFrame'> Index: 94 entries, 0 to 95 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 year 94 non-null int64 1 graduate_type 94 non-null object 2 no_of_graduates 94 non-null int64 dtypes: int64(2), object(1) memory usage: 2.9+ KB *** Column Unique Values *** year values: [2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021] graduate_type values: ['Medical' 'Nursing' 'Allied Health Professionals' 'Dentistry' 'Pharmacy' 'Oral Health Therapist'] no_of_graduates values: [ 229 1140 115 30 81 227 1446 188 36 86 1538 151 35 22 87 219 1523 124 31 15 98 228 1619 141 43 21 118 279 1665 163 42 107 290 1744 257 123 303 1641 206 48 138 312 1564 222 45 302 1479 51 17 150 328 1558 210 57 23 165 353 1536 56 24 198 423 1668 241 52 0 183 437 1818 89 448 1958 456 2102 252 58]
df = df[df["year"] >= 2017]
fig, ax = plt.subplots(figsize=(15,9))
sns.set(style='white')
sns.barplot(x="year",y="no_of_graduates",hue="graduate_type",data=df)
ax.grid(linestyle="--")
ax.set_ylim(0,2500)
plt.xlabel("Year")
plt.ylabel("Number of graduates")
plt.legend(title="Healthcare Professionals", loc="upper left")
plt.show()
df = pd.read_csv("data/Top4ConditionsofPolyclinicAttendances.csv")
summarize(df)
*** Dataframe info *** <class 'pandas.core.frame.DataFrame'> RangeIndex: 64 entries, 0 to 63 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 year 64 non-null int64 1 condition 64 non-null object 2 percentage_diagnoses 64 non-null float64 dtypes: float64(1), int64(1), object(1) memory usage: 1.6+ KB *** Column Unique Values *** year values: [2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021] condition values: ['Hyperlipidemia' 'Hypertensive Disease' 'Acute Upper Respiratory Tract Infection including Influenza' 'Diabetes Mellitus'] percentage_diagnoses values: [13.8 17.2 11.9 8.6 14.9 17.3 11.6 9. 15.5 10.8 9.1 15.8 16.6 11.1 15.6 15.9 11.2 8.9 15.7 10.6 8.8 16. 10.5 16.2 9.6 9.2 15. 9.3 14.2 14.4 13.7 13.2 9.4 13.3 15.1 4.7 15.2 14.5 10.3 3.6]
condition_dict = { x:[] for x in list(df["condition"].unique()) }
for i in range(0, len(df["percentage_diagnoses"])):
row = df.iloc[i,]
condition_dict[row["condition"]].append(row["percentage_diagnoses"])
width = 0.5
fig, ax = plt.subplots(figsize=(12,6))
bottom = np.zeros(16, dtype=float)
for disease, values in condition_dict.items():
p = ax.bar(list(df["year"].unique()), values, width, label=disease, bottom=bottom)
bottom += values
ax.grid(linestyle="--")
ax.set_ylim(0,70)
ax.set_xlabel("Year")
ax.set_ylabel("Percentage diagnosis")
ax.legend(loc="upper right")
plt.show()
df_2021 = df[df["year"] == 2021]
explode = (0.05, 0.05, 0.05, 0.05)
color = ["blue","orange","red","green"]
plt.pie(df_2021["percentage_diagnoses"], labels = df_2021["condition"], autopct='%.2f%%', explode = explode, colors = color, startangle=150)
plt.title("Polyclinic attendance for 4 major illnesses in 2021")
plt.show()
df = pd.read_csv("data/HospitalAdmissionsBySectorAnnual.csv")
summarize(df)
df = df[(df["year"] >= 2006) & (df["level_2"] == "Public") & (df["level_1"] == "Acute Hospitals Admissions")]
df = df.astype({'value': 'int64'})
df.head()
*** Dataframe info *** <class 'pandas.core.frame.DataFrame'> RangeIndex: 222 entries, 0 to 221 Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 year 222 non-null int64 1 level_1 222 non-null object 2 level_2 222 non-null object 3 value 222 non-null object dtypes: int64(1), object(3) memory usage: 7.1+ KB *** Column Unique Values *** year values: [1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020] level_1 values: ['Acute Hospitals Admissions' 'Psychiatric Hospitals Admissions' 'Community Hospitals Admissions'] level_2 values: ['Public' 'Non-public'] value values: ['na' '308016' '92620' '8245' '269' '0' '6229' '317355' '95850' '8417' '288' '7312' '321187' '91013' '8884' '7013' '323535' '90057' '9060' '7612' '333573' '93732' '9759' '7534' '347398' '98986' '9628' '572' '7107' '354076' '105826' '8382' '688' '7063' '364911' '109282' '8110' '1053' '7649' '372661' '116913' '9050' '1467' '7600' '379973' '119916' '8986' '2187' '8483' '416663' '129593' '9028' '5432' '8449' '436882' '131235' '9316' '7361' '8994' '455272' '129547' '9215' '8271' '483939' '134197' '9234' '10215' '9828' '444863' '111648' '8426' '11169' '9679']
| year | level_1 | level_2 | value | |
|---|---|---|---|---|
| 132 | 2006 | Acute Hospitals Admissions | Public | 308016 |
| 138 | 2007 | Acute Hospitals Admissions | Public | 317355 |
| 144 | 2008 | Acute Hospitals Admissions | Public | 321187 |
| 150 | 2009 | Acute Hospitals Admissions | Public | 323535 |
| 156 | 2010 | Acute Hospitals Admissions | Public | 333573 |
fig, ax = plt.subplots(figsize = (10,6))
plt.ylim(275000,550000)
plt.grid()
plt.xlabel("Year")
plt.ylabel("Admissions")
plt.title("Public Acute Hospitals Admissions")
sns.set(style='white')
sns.histplot(
df, x="year", y="value",
bins=30, discrete=(True, False), log_scale=(False, True)
)
<Axes: title={'center': 'Public Acute Hospitals Admissions'}, xlabel='Year', ylabel='Admissions'>
df = pd.read_csv("data/GovernmentTotalExpenditure.csv")
summarize(df)
df = df[df["ministry"] == "Health"]
df.head()
def convert_from_m_to_b(dataframe):
dataframe["amount"] = dataframe["amount"] / 1000
convert_from_m_to_b(df)
fig, ax = plt.subplots(figsize=(8,5))
plt.title("Singapore's Healthcare Expenditure by Year\n")
plt.xlabel("Financial Year")
plt.ylabel("Amount ($ billions)")
plt.grid(linestyle="--")
sns.lineplot(df, x="financial_year",y="amount",hue="type")
plt.legend(title="Expenditure type")
plt.show()
*** Dataframe info ***
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1120 entries, 0 to 1119
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 financial_year 1120 non-null int64
1 actual_revised_estimated 1120 non-null object
2 sector 1120 non-null object
3 ministry 1120 non-null object
4 type 1120 non-null object
5 amount 1120 non-null int64
6 percent_of_gdp 1120 non-null float64
dtypes: float64(1), int64(2), object(4)
memory usage: 61.4+ KB
*** Column Unique Values ***
financial_year values: [1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010
2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024]
actual_revised_estimated values: ['Actual' 'Revised' 'Estimated']
sector values: ['Economic Development' 'Government Administration'
'Security and External Relations' 'Social Development']
ministry values: ['Info-Communications and Media Development' 'Manpower'
'Trade and Industry' 'Transport' 'Communications and Information'
'Finance' 'Law' 'Organs of State' "Prime Minister's Office" 'Defence'
'Foreign Affairs' 'Home Affairs' 'Culture, Community and Youth'
'Education' 'Health' 'National Development'
'Social and Family Development' 'Sustainability and the Environment']
type values: ['Development' 'Operating']
amount values: [ 39 94 82 179 23 5 25 33 46 28 48 38
525 278 126 68 296 153 105 4 8 29 53 17
37 0 24 47 42 34 76 88 160 227 267 378
454 490 617 535 731 765 855 1104 836 12 6 18
54 52 36 15 35 22 62 21 65 79 85 127
132 136 91 107 130 156 158 161 166 188 225 224
414 439 380 456 496 520 611 634 645 697 892 4166
4187 3767 1551 1169 2175 2646 2073 1820 1293 1009 1421 1154
1055 1207 1516 2183 2334 2673 2553 2387 1947 2102 2398 2858
2688 3656 3176 2910 3398 3950 4465 4343 341 315 366 412
553 564 497 392 436 473 528 660 704 684 671 685
710 729 783 921 943 993 1116 2509 1986 2757 1603 1629
1346 1737 720 1130 1508 1203 1115 1776 1617 1518 1621 3379
4082 4250 4003 5414 5504 5489 10332 9319 7572 9877 6617 5327
6534 9157 10413 11792 314 442 409 2390 406 385 292 289
277 285 321 367 371 464 502 555 598 953 1607 1900
1945 2945 3738 3692 2550 2447 90 637 240 338 199 57
70 100 214 308 350 10 44 30 72 74 56 162
298 310 339 388 352 379 333 318 345 428 438 484
552 631 711 682 740 824 833 828 852 904 951 1002
1173 1187 435 402 131 213 325 184 329 218 167 123
148 316 273 434 319 377 320 217 311 197 387 89
97 109 120 112 103 96 106 118 119 163 177 174
181 185 196 207 212 263 279 98 251 151 31 11
14 55 59 61 150 135 170 173 186 182 187 195
247 235 249 313 326 384 410 440 508 578 659 693
750 32 41 40 9 20 159 172 250 121 108 134
143 141 176 194 203 259 290 381 374 653 670 746
692 1004 1036 1144 619 418 775 620 861 730 509 524
363 355 349 328 479 468 422 429 431 543 594 482
481 556 609 871 905 6097 6481 6647 6561 7089 7694 7714
8243 8889 9273 9660 10397 10603 10623 10797 11056 11329 11867 12672
13281 13582 13825 13669 13027 14737 16343 18885 19344 81 49 86
83 71 45 19 26 16 180 211 233 257 344 343
360 382 394 447 444 398 466 494 900 847 654 570
478 460 399 396 506 573 831 1129 972 800 1332 1050
1020 793 994 1032 1101 1300 1580 1563 1708 1752 1825 2010
2428 2478 2577 2937 2947 3266 3522 4029 4377 4652 4996 5505
5703 6013 6678 6942 7044 7478 137 139 155 110 95 128
144 27 200 220 234 241 228 294 500 430 336 354
424 517 721 738 779 507 489 300 210 1571 364 1003
1335 1795 1444 1586 1668 1674 1927 2096 2272 1951 2099 1102
1686 1601 1591 1473 1774 1218 1239 867 608 742 753 877
1042 860 973 886 699 657 791 493 305 332 3348 3167
3257 4277 4767 4824 4997 4975 5215 6352 6786 7477 7838 8999
9698 9637 10665 10712 11236 11812 12080 12429 11932 11767 12605 12890
13748 14132 274 140 145 114 485 453 605 723 1147 1413
1619 1465 1490 1404 949 1049 1204 1272 1375 896 992 936
1072 1445 1451 1904 1604 1680 1840 2019 2379 2920 3258 3489
4066 5044 5872 7520 8199 8734 8937 9915 14311 16274 15909 16674
17397 1 2 3 340 469 666 709 706 1028 1027 1029
2291 1562 1797 1888 2003 2172 2069 1770 1089 1865 1153 1010
675 1095 1307 1646 1572 1354 901 1308 1321 1248 1257 1191
926 1110 1221 1135 441 358 805 959 790 1428 673 858
2284 3218 2941 2606 4626 4710 7851 7475 169 138 92 115
64 111 87 93 84 104 124 513 572 814 844 903
962 1182 1565 1829 1721 1739 1598 1722 2121 2389 2459 2608
2835 3673 3592 3653 4057 4561 652 945 771 952 323 427
567 1549 839 1076 708 832 390 448 408 606 656 726
813 958 1125 1201 1295 1540 1832 1848 2420 2834]
percent_of_gdp values: [0. 0.001 0.002 0.008 0.007 0.006 0.015 0.019 0.014 0.011 0.005 0.003
0.004 0.009 0.012 0.013 0.024 0.021 0.016 0.041 0.046 0.044 0.039 0.042
0.038 0.035 0.036 0.032 0.03 0.029 0.028 0.027 0.026 0.01 0.022 0.025
0.023 0.02 0.018 0.017]
import plotly.express as px
df = pd.read_csv("data/GovernmentTotalExpenditure.csv")
df = df[df["type"] == "Operating"]
fig = px.scatter(df, x="percent_of_gdp", y="amount", animation_frame="financial_year", animation_group="ministry",
size="amount", color="ministry", hover_name="ministry",
log_y=True, size_max=120, range_x=[-0.01,0.06], range_y=[10,1000000])
fig.show()
df = pd.read_csv("data/MedisaveAccountsandBalancesAnnual.csv")
df.head()
summarize(df)
*** Dataframe info *** <class 'pandas.core.frame.DataFrame'> RangeIndex: 16 entries, 0 to 15 Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 year 16 non-null int64 1 no_of_accounts 16 non-null int64 2 total_balance 16 non-null float64 3 average_balance 16 non-null int64 4 amount_withdrawn 16 non-null int64 dtypes: float64(1), int64(4) memory usage: 772.0 bytes *** Column Unique Values *** year values: [2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021] no_of_accounts values: [2700000 2800000 2900000 3000000 3100000 3200000 3300000 3400000 3500000 3600000 3700000 3800000] total_balance values: [ 36.9 39.3 42.4 45.8 50.2 54.8 60. 64.9 70.5 75.9 82.1 88.6 96.1 102. 110.1 117.6] average_balance values: [13600 14100 14900 15700 16900 18200 19400 20600 21800 22700 24200 25600 26700 27900 29600 31000] amount_withdrawn values: [ 445 517 558 601 678 722 767 798 853 900 931 964 1027 1089 1007 1115]
sns.lmplot(df,x="average_balance", y="amount_withdrawn", hue="year")
plt.title("Relationship between Medisave balance and withdrawal\n")
plt.xlabel("Average Medisave balance $")
plt.ylabel("Amount withdrawn $")
C:\Users\jinji\anaconda3\Lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight
Text(48.59481250000002, 0.5, 'Amount withdrawn $')
sns.lmplot(df,x="average_balance", y="amount_withdrawn")
plt.title("Relationship between Medisave balance and withdrawal\n")
plt.xlabel("Average Medisave balance $")
plt.ylabel("Amount withdrawn $")
C:\Users\jinji\anaconda3\Lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight
Text(20.53125, 0.5, 'Amount withdrawn $')
df = pd.read_csv("data/NumberofPolicyholdersforMediShieldandIntegratedShieldPlansAnnual.csv")
df.tail()
summarize(df)
*** Dataframe info *** <class 'pandas.core.frame.DataFrame'> RangeIndex: 16 entries, 0 to 15 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 year 16 non-null int64 1 policyholders 16 non-null object 2 policyholders_with_private_plans 16 non-null int64 dtypes: int64(2), object(1) memory usage: 516.0+ bytes *** Column Unique Values *** year values: [2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021] policyholders values: ['2760000' '2870000' '3076000' '3299000' '3390000' '3500000' '3541000' '3593000' '3656000' '-'] policyholders_with_private_plans values: [1510000 1660000 1790000 1930000 2068000 2200000 2318000 2404000 2485000 2548000 2619000 2686000 2749000 2801000 2830000 2864000]
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import mysql.connector
connect = mysql.connector.connect(
user = "root",
password = secret_password,
host = "127.0.0.1",
database = "spdatabase"
)
query = "SELECT * FROM spdatabase.policyholders"
df = pd.read_sql(query, con = connect)
plt.plot(df["year"],df["policyholders_with_private_plans"])
plt.xlabel("Year")
plt.ylabel("Number of policy holders with private plans (millions)")
plt.grid(linestyle="--")
plt.show()
connect.close()
C:\Users\jinji\AppData\Local\Temp\ipykernel_23720\1233250901.py:15: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.